


Install/setup your IDE 
Create an IPUMS account here

View/create API key > Copy your key (you will use it later)read_nhgis() when you downloaded data to your devicedefine_extract_nhgis() to specify the data you want to requestsubmit_extract() submits the extract request via the APIdownload_extract() downloads the data to your computeripums_var_info() get contextual information about variables# A tibble: 266 × 4
name group description sequence
<chr> <chr> <chr> <int>
1 1790_cPop 1790 Census Population Data [US, States & Counties] 101
2 1800_cPop 1800 Census Population Data [US, States & Counties] 201
3 1810_cPop 1810 Census Population Data [US, States & Counties] 301
4 1820_cPop 1820 Census Population Data [US, States & Counties] 401
5 1830_cPop 1830 Census Population Data [US, States & Counties] 501
6 1840_cAg 1840 Census Agriculture Data [US, States & Counties] 601
7 1840_cMfg 1840 Census Manufacturing Data [US, States & Counties] 602
8 1840_cPopX 1840 Census Population & Other Data [US, States & Counti… 603
9 1850_cAg 1850 Census Agriculture Data [US, States & Counties] 701
10 1850_cPAX 1850 Census Population, Agriculture & Other Data [US, St… 702
# ℹ 256 more rows
library(dplyr)
nhgis_data |>
filter(group == "2023 American Community Survey") |>
select(name, description)# A tibble: 5 × 2
name description
<chr> <chr>
1 2023_ACS1 1-Year Data
2 2019_2023_ACS5a 5-Year Data [2019-2023, Block Groups & Larger Areas]
3 2019_2023_ACS5b 5-Year Data [2019-2023, Tracts & Larger Areas]
4 2019_2023_ACS5c 5-Year Data [2019-2023, Summary by Residence 1 Year Ago]
5 2019_2023_ACS5d 5-Year Data [2019-2023, Summary by Place of Work]
$name
[1] "2019_2023_ACS5a"
$nhgis_id
[1] "ds267"
$group
[1] "2023 American Community Survey"
$description
[1] "5-Year Data [2019-2023, Block Groups & Larger Areas]"
$sequence
[1] 6202
$has_multiple_data_types
[1] TRUE
$data_tables
# A tibble: 416 × 7
name description universe nhgis_code sequence dataset_name n_variables
<chr> <chr> <chr> <chr> <int> <chr> <int>
1 B01001 Sex by Age Total p… ASNQ 1 2019_2023_A… 49
2 B01002 Median Age by … Total p… ASNR 2 2019_2023_A… 3
3 B01002A Median Age by … People … ASNS 3 2019_2023_A… 3
4 B01002B Median Age by … People … ASNT 4 2019_2023_A… 3
5 B01002C Median Age by … People … ASNU 5 2019_2023_A… 3
6 B01002D Median Age by … People … ASNV 6 2019_2023_A… 3
7 B01002E Median Age by … People … ASNW 7 2019_2023_A… 3
8 B01002F Median Age by … People … ASNX 8 2019_2023_A… 3
9 B01002G Median Age by … People … ASNY 9 2019_2023_A… 3
10 B01002H Median Age by … White a… ASNZ 10 2019_2023_A… 3
# ℹ 406 more rows
$geog_levels
# A tibble: 65 × 4
name description has_geog_extent_sele…¹ sequence
<chr> <chr> <lgl> <int>
1 nation Nation FALSE 1
2 region Region FALSE 2
3 division Division FALSE 3
4 state State FALSE 4
5 state_260 American Indian Area/Alaska Native… FALSE 5
6 state_290 American Indian Area/Alaska Native… FALSE 6
7 state_311 Metropolitan Statistical Area/Micr… FALSE 9
8 state_315 Metropolitan Statistical Area/Micr… FALSE 10
9 state_331 Combined Statistical Area--State FALSE 11
10 state_333 Combined Statistical Area--Metropo… FALSE 12
# ℹ 55 more rows
# ℹ abbreviated name: ¹has_geog_extent_selection
$geographic_instances
# A tibble: 52 × 2
name description
<chr> <chr>
1 010 Alabama
2 020 Alaska
3 040 Arizona
4 050 Arkansas
5 060 California
6 080 Colorado
7 090 Connecticut
8 100 Delaware
9 110 District of Columbia
10 120 Florida
# ℹ 42 more rows
$breakdowns
# A tibble: 1 × 4
name type description breakdown_values
<chr> <chr> <chr> <list>
1 bs32 Spatial Geographic Component <tibble [19 × 2]>
# A tibble: 416 × 7
name description universe nhgis_code sequence dataset_name n_variables
<chr> <chr> <chr> <chr> <int> <chr> <int>
1 B01001 Sex by Age Total p… ASNQ 1 2019_2023_A… 49
2 B01002 Median Age by … Total p… ASNR 2 2019_2023_A… 3
3 B01002A Median Age by … People … ASNS 3 2019_2023_A… 3
4 B01002B Median Age by … People … ASNT 4 2019_2023_A… 3
5 B01002C Median Age by … People … ASNU 5 2019_2023_A… 3
6 B01002D Median Age by … People … ASNV 6 2019_2023_A… 3
7 B01002E Median Age by … People … ASNW 7 2019_2023_A… 3
8 B01002F Median Age by … People … ASNX 8 2019_2023_A… 3
9 B01002G Median Age by … People … ASNY 9 2019_2023_A… 3
10 B01002H Median Age by … White a… ASNZ 10 2019_2023_A… 3
# ℹ 406 more rows
# A tibble: 52 × 7
name description universe nhgis_code sequence dataset_name n_variables
<chr> <chr> <chr> <chr> <int> <chr> <int>
1 B25003 Tenure Occupie… ASS9 200 2019_2023_A… 3
2 B25003A Tenure (White … Occupie… ASTA 201 2019_2023_A… 3
3 B25003B Tenure (Black … Occupie… ASTB 202 2019_2023_A… 3
4 B25003C Tenure (Americ… Occupie… ASTC 203 2019_2023_A… 3
5 B25003D Tenure (Asian … Occupie… ASTD 204 2019_2023_A… 3
6 B25003E Tenure (Native… Occupie… ASTE 205 2019_2023_A… 3
7 B25003F Tenure (Some O… Occupie… ASTF 206 2019_2023_A… 3
8 B25003G Tenure (Two or… Occupie… ASTG 207 2019_2023_A… 3
9 B25003H Tenure (White … Occupie… ASTH 208 2019_2023_A… 3
10 B25003I Tenure (Hispan… Occupie… ASTI 209 2019_2023_A… 3
# ℹ 42 more rows
define_extract_nhgis(
description = "", #name of the extract
datasets = NULL, #chosen datasets see ds_spec() below
time_series_tables = NULL, #time series (only if aplicable)
shapefiles = NULL, # include if you are going to map
geographic_extents = NULL,
breakdown_and_data_type_layout = NULL,
tst_layout = NULL,
data_format = NULL
) [1] "GISJOIN" "YEAR" "STUSAB" "REGIONA" "DIVISIONA" "STATE"
[7] "STATEA" "COUNTY" "COUNTYA" "COUSUBA" "PLACEA" "TRACTA"
[13] "BLKGRPA" "CONCITA" "AIANHHA" "RES_ONLYA" "TRUSTA" "AIHHTLI"
[19] "AITSA" "ANRCA" "CBSAA" "CSAA" "METDIVA" "CNECTA"
[25] "NECTADIV" "UAA" "CDCURRA" "SLDUA" "SLDLA" "ZCTA5A"
[31] "SUBMCDA" "SDELMA" "SDSECA" "SDUNIA" "PCI" "PUMAA"
[37] "GEO_ID" "BTTRA" "BTBGA" "TL_GEO_ID" "NAME_E" "ASTBE001"
[43] "ASTBE002" "ASTBE003" "ASTHE001" "ASTHE002" "ASTHE003" "ASTIE001"
[49] "ASTIE002" "ASTIE003" "NAME_M" "ASTBM001" "ASTBM002" "ASTBM003"
[55] "ASTHM001" "ASTHM002" "ASTHM003" "ASTIM001" "ASTIM002" "ASTIM003"
nhgis_data <- nhgis_data |>
mutate(
ho_prop_white = ASTHE002 / ASTHE001,
ho_prop_black = ASTBE002 / ASTBE001,
ho_prop_hispanic = ASTIE002 / ASTIE001
)|>
mutate(
ho_ratio_white_blac = ho_prop_white / ho_prop_black,
ho_ratio_white_hispanic = ho_prop_white / ho_prop_hispanic,
)|>
select(YEAR, STATEA, STATE, COUNTYA, COUNTY, GISJOIN, starts_with("ho_ratio")) |>
filter(STATEA==42)Some observations were lost in the join (3155 observations in the shape file).
See `join_failures(...)` for more details.
You can also use:
ipums_shape_left_join()ipums_shape_right_join()ipums_shape_full_join()sf is a package to manage spatial formatsmapgl()
The downloaded binary packages are in
/var/folders/9m/2jtnq8_d5hb5vykhx9m_0d080000gp/T//RtmpUyn6Tc/downloaded_packages
pa_map |>
add_fill_layer(
id = "homeowner",
source = for_mapping,
fill_color = interpolate(
column = "ho_ratio_white_hispanic",
values = c(1, 3),
stops = c("lightyellow", "darkorange"),
na_color = "lightgrey"
),
fill_opacity = 0.7
) |>
add_legend(
"Homeownership ratio white/hispanic, PA, 2023",
values = c(1, 3),
colors = c("lightyellow", "darkorange")
)